set more off

/*
// prepare tasks crosswalk
use ${orig}/tasks_kldb2010_3.dta, clear
keep if jahr == 2013
drop bezeichnung jahr
rename kldb2010_3 beruf2010_3
save ${data}/tasks_kldb2010_3_2013.dta, replace
*/

*-------------------------------*
* Merge to panel *
*-------------------------------*

clear
forvalues y= 1991/2017{
	append using ${data}/siab_7517_v1_quer_`y'_06-30
	capture gen jahr = 1991
	replace jahr = `y' if jahr == .
	}

****************************************************
********      1.1. Controls from SIAB   ************
****************************************************

//merge on establishment info (industry sector, location)
merge m:1 betnr jahr using ${orig}/SIAB_7517_v1_bhp_basis_v1.dta
drop if _merge == 2
drop _merge

//merge on task info
merge m:1 beruf2010_3 using ${data}/tasks_kldb2010_3_2013.dta
drop if _merge == 2
drop _merge

*all missings to .
foreach var of varlist _all {
		replace `var' = . if `var'==.n
		replace `var' = . if `var'==.z
}

rename ao_kreis region
rename jahr year

//SKILL
gen n_lowskill = 1 if niveau == 1
gen n_midskill = 1 if niveau == 2
gen n_highskill = 1 if niveau == 3 | niveau == 4


/* EMPLOYMENT VARIABLES
1  1 ALG Arbeitslosengeld
2  2 ALHI Arbeitslosenhilfe
3  3 UHG Unterhaltsgeld
31  31 Arbeitslos (ALO)
33 Nicht arbeitslos arbeitssuchend
35  35 Ratsuchend (RASU)
51  51 ALO  (Enddatum anonym.)
53  53 NALO (Enddatum anonym.)
101 Sozialversicherungspflichtig B
102 Auszubildende ohne besondere M
103 Beschaeftigte in Altersteilzei
106  106 Werkstudenten
109  109 Geringfuegig Beschaeftigte
118  118 Unstaendig Beschaeftigte
119  119 Versicherungsfreie Rentner
209 Geringfuegig Beschaeftigte (Ha
*/
gen emp = 0
replace emp = 1 if inrange(erwstat,101,118) | erwstat == 209
replace emp = .5 if (inrange(erwstat,101,118) | erwstat == 209) & teilzeit == 1
gen n_emp = 1 if emp > 0
gen emp_manufacturing = emp if w08_3_gen >= 100 & w08_3_gen < 360
gen n_emp_manufacturing = 1 if emp_manufacturing > 0

//EMP by skill
gen emp_lq = emp if n_lowskill == 1
gen emp_mq = emp if n_midskill == 1
gen emp_hq = emp if n_highskill == 1
gen n_emp_lq = 1 if emp_lq > 0
gen n_emp_mq = 1 if emp_mq > 0
gen n_emp_hq = 1 if emp_hq > 0


//INIDIVIDUALS
gen n_individuals = 1

/* UNEMPOYED
gen unemp = 1 if erwstat < 100 & erwstat > 0
gen unemp_lq = unemp if n_lowskill == 1
gen unemp_mq = unemp if n_midskill == 1
gen unemp_hq = unemp if n_highskill == 1

// attempt to fix unemployed regional identifier (wo_kreis missing)
replace region = wo_kreis if unemp = 1
*/

// TASKS
// dummies for main tasks:
forvalues task=1/5{
gen main_task_`task' = 1 if haupttask == `task'
}

gen n_gwkomp = 1 if gwkomp1 != 0 // task composition 1-5 come as a bundle


// EMPLOYMENT BY EDUCATION
gen emp_edu_no = emp if schule == 1
gen n_emp_edu_no = 1 if emp_edu_no > 0

gen emp_edu_low = emp if inrange(schule,4,6)
gen n_emp_edu_low = 1 if emp_edu_low > 0

gen emp_edu_high = emp if inrange(schule,7,9)
gen n_emp_edu_high = 1 if emp_edu_high > 0

//FAMILY
gen kids_no = 1 if kind == 0
gen kids_yes = 1 if kind > 1


//WAGE
gen wage = log( 1 + gtentgelt)
gen n_wage = 1 if wage != .

gen wage_manufacturing = wage if w08_3_gen >= 100 & w08_3_gen < 360
gen n_wage_manufacturing = 1 if wage_manufacturing != .

gen wage_lq = wage if n_lowskill == 1
gen n_wage_lq = 1 if wage_lq != .

gen wage_mq = wage if n_midskill == 1
gen n_wage_mq = 1 if wage_mq != .

gen wage_hq = wage if n_highskill == 1
gen n_wage_hq = 1 if wage_hq != .

//GENDER
gen emp_female = emp if frau == 1
gen n_emp_female = 1 if emp_female > 0

//foreign
gen foreign = 1 if nation_gr > 10
gen emp_foreign = emp if nation_gr > 10
gen n_emp_foreign = 1 if emp_foreign > 0

// AGE
gen emp_above50 = emp if year - gebjahr > 50
gen n_emp_above50 = 1 if year - gebjahr > 50

// mean age
gen mean_age = year - gebjahr if emp > 0
gen n_mean_age = 1 if mean_age != .
gen mean_age_manufacturing = year - gebjahr if emp_manufacturing > 0
gen n_mean_age_manufacturing = 1 if mean_age_manufacturing != .
// Aggregate to region-year level
collapse (sum) n_individuals (sum) foreign (sum) emp_foreign (sum) n_emp_foreign ///
		 (sum) emp_female (sum) n_emp_female (sum) emp (sum) n_emp  ///
		 (sum) emp_manufacturing (sum) n_emp_manufacturing ///
		 (sum) emp_above50 (sum) n_emp_above50 ///
		 /// // (sum) unemp (sum) unemp_lq (sum) unemp_mq (sum) unemp_hq
		 (sum) emp_lq (sum) emp_mq (sum) emp_hq (sum) n_emp_lq (sum) n_emp_mq (sum) n_emp_hq ///
		 (mean) gwkomp1 (mean) gwkomp2 (mean) gwkomp3 (mean) gwkomp4 (mean) gwkomp5 (sum) n_gwkomp ///
		 (sum) main_task_1 (sum) main_task_2 (sum) main_task_3 ///
		 (sum) main_task_4 (sum) main_task_5 ///
		 (sum) emp_edu_no (sum) n_emp_edu_no (sum) emp_edu_low (sum) n_emp_edu_low ///
		 (sum) emp_edu_high (sum) n_emp_edu_high (sum) kids_no (sum) kids_yes ///
		 (mean) wage (sum) n_wage (mean) wage_manufacturing (sum) n_wage_manufacturing ///
		 (mean) wage_lq (sum) n_wage_lq (mean) wage_mq (sum) n_wage_mq (mean) wage_hq (sum) n_wage_hq ///
		 (sum) n_lowskill (sum) n_midskill (sum) n_highskill ///
		 (mean) mean_age (sum) n_mean_age (mean) mean_age_manufacturing (sum) n_mean_age_manufacturing ///
		 , by(region year)
drop if region == .


label var  foreign "Foreigners"
label var  emp_foreign "Foreign employees (ft-eq)"
label var  emp_female "Female employees (ft-eq)"
label var  emp "Employees (ft-eq)"
label var  emp_manufacturing "Employment manufacturing (ft-eq)"
label var  emp_above50 "Employment above 50 (ft-eq)"
*label var  unemp "Unemployed (erwstat < 100)"
*label var  unemp_lq "Unemployed lowskill (erwstat < 100)"
*label var  unemp_mq "Unemployed midskill (erwstat < 100)"
*label var  unemp_hq "Unemployed highskill (erwstat < 100)"
label var  emp_lq "Employment lowskill (ft-eq)"
label var  emp_mq "Employment midskill (ft-eq)"
label var  emp_hq "Employment highskill (ft-eq)"
label var  gwkomp1 "Task composition dim 1"
label var  gwkomp2 "Task composition dim 2"
label var  gwkomp3 "Task composition dim 3"
label var  gwkomp4 "Task composition dim 4"
label var  gwkomp5 "Task composition dim 5"
label var  main_task_1 "Main task == 1 (from gwkomp)"
label var  main_task_2 "Main task == 2 (from gwkomp)"
label var  main_task_3 "Main task == 3 (from gwkomp)"
label var  main_task_4 "Main task == 4 (from gwkomp)"
label var  main_task_5 "Main task == 5 (from gwkomp)"
label var  emp_edu_no "Employment no education (ft-eq)"
label var  emp_edu_low "Employment low education (ft-eq)"
label var  emp_edu_high "Employment high education (ft-eq)"
label var  kids_no "No kids"
label var  kids_yes "Has kids"
label var  wage "Wage (log gtentgelt)"
label var  wage_manufacturing "Wage (log gtentgelt)"
label var  wage_lq "Wage lowskill (log gtentgelt)"
label var  wage_mq "Wage midskill (log gtentgelt)"
label var  wage_hq "Wage highskill (log gtentgelt)"
label var  mean_age "Mean age of employees"
label var  mean_age_manufacturing "Mean age of manufacturing workers"


foreach x of var * {
	rename `x' SIAB_`x'
}

rename SIAB_region region
rename SIAB_year year

*** Create Bundeslaender
gen state_code = floor(region/1000)
label var state_code "Bundesland"

label define state_labels 1 "Schleswig-Holstein" 2 "Hamburg" 3 "Niedersachsen" 4 "Bremen" ///
	5 "Nordrhein-Westfalen"  6 "Hessen"  7 "Rheinland-Pfalz"  8 "Baden-Württemberg" ///
	9 "Bayern" 10 "Saarland"  11 "Berlin"  12 "Brandenburg" 13 "Mecklenburg-Vorpommern" ///
	14 "Sachsen"  15 "Sachsen-Anhalt"  16 "Thüringen"

label values state state_labels

*** Create broad region
gen broad_employment_region = ""
replace broad_employment_region = "north" if state <= 4
replace broad_employment_region = "west" if (state >= 5 & state <= 7) | state == 10
replace broad_employment_region = "south" if state >= 8 & state <= 9
replace broad_employment_region = "east" if state >= 11
encode(broad_employment_region), gen(broad_region)
label var broad_employment_region "Broad region"

save ${data}/SIAB_controls.dta, replace














